Libraries¶

# Calculus
import pandas as pd

import os

# Data viz
import plotly.express as px
import plotly.io as pio
pio.renderers.default='notebook'

Brief¶

  • numbers of crimes
    • per category
    • per location

Getting the data¶

columns_keep = [
#'division_number',
'date_reported',
'date_occurred',
#'area',
'area_name',
#'reporting_district',
#'part',
#'crime_code',
'crime_description',
#'modus_operandi',
'victim_age',
'victim_sex',
'victim_descent',
#'premise_code',
'premise_description',
#'weapon_code',
'weapon_description',
#'status',
'status_description',
#'crime_code_1',
#'crime_code_2',
#'crime_code_3',
#'crime_code_4',
'location',
#'cross_street',
'latitude',
'longitude',
]
# fetch all the data from the raw_data folder
current_dir = os.getcwd()
current_dir

file_path = os.path.join(current_dir, '..', 'raw_data', 'data.csv')
df = pd.read_csv(file_path)
df = df[columns_keep]
df['counter']=1

# Dates
df['date_occurred'] = pd.to_datetime(df['date_occurred'], errors='coerce')
df['year_occurred'] = df['date_occurred'].dt.year
df['month_occurred'] = df['date_occurred'].dt.month
df['hour_occurred'] = df['date_occurred'].dt.hour

df.head(3)
date_reported date_occurred area_name crime_description victim_age victim_sex victim_descent premise_description weapon_description status_description location latitude longitude counter year_occurred month_occurred hour_occurred
0 2020-01-08 2020-01-08 22:30:00 Southwest BATTERY - SIMPLE ASSAULT 36 F B SINGLE FAMILY DWELLING STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE) Adult Other 1100 W 39TH PL 34.0141 -118.2978 1 2020 1 22
1 2020-01-02 2020-01-01 03:30:00 Central BATTERY - SIMPLE ASSAULT 25 M H SIDEWALK UNKNOWN WEAPON/OTHER WEAPON Invest Cont 700 S HILL ST 34.0459 -118.2545 1 2020 1 3
2 2020-04-14 2020-02-13 12:00:00 Central SEX OFFENDER REGISTRANT OUT OF COMPLIANCE 0 X X POLICE FACILITY NaN Adult Arrest 200 E 6TH ST 34.0448 -118.2474 1 2020 2 12

Checking the data¶

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 852950 entries, 0 to 852949
Data columns (total 17 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   date_reported        852950 non-null  object        
 1   date_occurred        852950 non-null  datetime64[ns]
 2   area_name            852950 non-null  object        
 3   crime_description    852950 non-null  object        
 4   victim_age           852950 non-null  int64         
 5   victim_sex           740344 non-null  object        
 6   victim_descent       740336 non-null  object        
 7   premise_description  852432 non-null  object        
 8   weapon_description   296748 non-null  object        
 9   status_description   852950 non-null  object        
 10  location             852950 non-null  object        
 11  latitude             852950 non-null  float64       
 12  longitude            852950 non-null  float64       
 13  counter              852950 non-null  int64         
 14  year_occurred        852950 non-null  int64         
 15  month_occurred       852950 non-null  int64         
 16  hour_occurred        852950 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(5), object(9)
memory usage: 110.6+ MB
null_counts = df.isnull().sum()
# Convert to DataFrame for better visualization
null_counts_df = pd.DataFrame(null_counts, columns=['Number of Nulls'])
null_counts_df = null_counts_df[null_counts_df['Number of Nulls']>0]
null_counts_df['Nulls_share'] = null_counts_df['Number of Nulls'] / len(df)
null_counts_df['Nulls_share'] =null_counts_df['Nulls_share'] .round(2)
null_counts_df =null_counts_df.sort_values(by='Nulls_share',ascending=False)
null_counts_df
Number of Nulls Nulls_share
weapon_description 556202 0.65
victim_sex 112606 0.13
victim_descent 112614 0.13
premise_description 518 0.00
df.describe()
date_occurred victim_age latitude longitude counter year_occurred month_occurred hour_occurred
count 852950 852950.000000 852950.000000 852950.000000 852950.0 852950.000000 852950.000000 852950.000000
mean 2022-01-09 05:17:37.460835840 29.742191 33.983232 -118.040106 1.0 2021.532406 6.419488 13.185609
min 2020-01-01 00:01:00 -3.000000 0.000000 -118.667600 1.0 2020.000000 1.000000 0.000000
25% 2021-01-26 15:00:00 5.000000 34.014100 -118.429700 1.0 2021.000000 4.000000 9.000000
50% 2022-01-30 12:42:00 31.000000 34.058500 -118.321500 1.0 2022.000000 6.000000 14.000000
75% 2022-12-25 15:00:00 45.000000 34.163200 -118.273900 1.0 2022.000000 9.000000 19.000000
max 2023-12-04 23:00:00 120.000000 34.334300 0.000000 1.0 2023.000000 12.000000 23.000000
std NaN 21.799470 1.756263 6.089068 0.0 1.099147 3.380271 6.535645

Crimes & victimes¶

Crime description¶

df.groupby('crime_description')['counter'].count().reset_index().sort_values(by='counter',ascending=False).head(5)
crime_description counter
132 VEHICLE - STOLEN 91473
4 BATTERY - SIMPLE ASSAULT 67976
115 THEFT OF IDENTITY 53467
21 BURGLARY FROM VEHICLE 52611
20 BURGLARY 51961

Premise description¶

df.groupby('premise_description')['counter'].count().reset_index().sort_values(by='counter',ascending=False).head(10)
premise_description counter
266 STREET 216018
255 SINGLE FAMILY DWELLING 144367
207 MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC) 104461
227 PARKING LOT 59551
218 OTHER BUSINESS 40412
254 SIDEWALK 36700
298 VEHICLE, PASSENGER/TRUCK 25224
79 GARAGE/CARPORT 16673
63 DRIVEWAY 13838
246 RESTAURANT/FAST FOOD 10841

Weapon description¶

#weapon_description

df.groupby('weapon_description')['counter'].count().reset_index().sort_values(by='counter',ascending=False).head(10)
weapon_description counter
65 STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE) 159021
75 UNKNOWN WEAPON/OTHER WEAPON 31728
78 VERBAL THREAT 21767
27 HAND GUN 18330
58 SEMI-AUTOMATIC PISTOL 6657
32 KNIFE WITH BLADE 6INCHES OR LESS 6262
73 UNKNOWN FIREARM 6011
44 OTHER KNIFE 5361
39 MACE/PEPPER SPRAY 3390
77 VEHICLE 2979

Status description¶

#weapon_description

df.groupby('status_description')['counter'].count().reset_index().sort_values(by='counter',ascending=False).head(10)
status_description counter
2 Invest Cont 683107
1 Adult Other 91576
0 Adult Arrest 74004
3 Juv Arrest 2775
4 Juv Other 1484
5 UNK 4

Place¶

df.groupby('area_name')['counter'].count().reset_index().sort_values(by='counter',ascending=False).head(10)
area_name counter
1 Central 57691
0 77th Street 53631
12 Pacific 49850
15 Southwest 47858
6 Hollywood 45018
14 Southeast 43357
11 Olympic 42979
9 Newton 42743
8 N Hollywood 42455
20 Wilshire 40648

Crimes per place and years evolution¶

df.groupby(['area_name','year_occurred'])['counter'].count().reset_index().sort_values(by='counter',ascending=False).head(10)
area_name year_occurred counter
6 Central 2022 17673
7 Central 2023 15273
2 77th Street 2022 14532
62 Southwest 2022 13371
0 77th Street 2020 13309
5 Central 2021 13156
1 77th Street 2021 13074
50 Pacific 2022 13013
49 Pacific 2021 12917
3 77th Street 2023 12716

Crime evolution per place¶

df_evo = df.query(" year_occurred in (2021, 2023)  ").pivot_table(index='area_name', columns='year_occurred',aggfunc='sum',values='counter').reset_index()
df_evo.columns=['area_name','2021','2023']
df_evo.head()
area_name 2021 2023
0 77th Street 13074 12716
1 Central 13156 15273
2 Devonshire 8487 8665
3 Foothill 7032 6418
4 Harbor 8858 8212
import pandas as pd
import plotly.express as px

# Assuming df_evo is your DataFrame and it's already defined

# Calculate the evolution from 2021 to 2023
df_evo['evol'] = df_evo['2023'].astype(float) / df_evo['2021'].astype(float) - 1
df_evo['evol'] = df_evo['evol'].round(2)

# Sort the DataFrame based on the evolution
df_evo = df_evo.sort_values(by='evol')

# Categorize the evolution into colors
df_evo['colors'] = df_evo['evol'].apply(lambda x:
                                        'red' if x > 0.05
                                        else ('green' if x < -0.05
                                        else 'grey'))

# Define specific colors for each category
color_map = {
    'green': '#32CD32',  # Light green for positive evolution
    'red': '#FF6347',    # Tomato red for negative evolution
    'grey': '#808080'    # Grey for neutral
}

# Create a scatter plot with specified colors and area names
fig = px.scatter(df_evo, x="evol", y="2023", color='colors', title='Evolution of 2023 Compared to 2021',
                 color_discrete_map=color_map,
                 text='area_name')  # Use 'area_name' instead of 'areaname'

# Set all markers to have grey borders and 70% opacity
fig.update_traces(marker=dict(size=12, line=dict(width=1, color='grey'), opacity=0.7))

# Improve layout for better readability
fig.update_traces(textposition='top center')
fig.update_layout(
    showlegend=False,
    plot_bgcolor='white',  # Set plot background to white
    paper_bgcolor='white'  # Set overall figure background to white
)

# Show the plot
fig.show('notebook')

Central seems to be the most dangerous place, where crimes are also growing a lot.

fig = px.line(df.groupby(['area_name','year_occurred'])['counter'].count().reset_index().sort_values(by=['area_name','year_occurred'],ascending=False)
              , x="year_occurred"
                , y="counter"
                ,color = 'area_name'
                , title='Crime evolution per place'
                ,markers=True
                )
fig.show('notebook')

Victim age¶

df.groupby('victim_age')['counter'].count().reset_index().sort_values(by='counter',ascending=False).head(5)
victim_age counter
3 0 211842
32 30 19421
37 35 19008
33 31 18603
31 29 18552
import plotly.express as px

# Assuming df is your DataFrame and it's already defined
fig = px.histogram(df, x="victim_age", title='Distribution of victims per age')

# Calculate the number of victims with age 0
num_victims_no_age = len(df[df['victim_age'] == 0])

# Add an annotation with an arrow
fig.add_annotation(
    x=0,  # Specifies the x-coordinate of the arrow's head
    y=max(df['victim_age'].value_counts()),  # Adjust this value as needed for y-coordinate
    text=f"{num_victims_no_age} victims don't have an attributed age",  # The text of the annotation
    showarrow=True,  # Show an arrow pointing to the specified coordinates
    arrowhead=1  # The style of the arrowhead (1 is a simple arrow)
)

# Show the figure
fig.show('notebook')
# Assuming df is your DataFrame and it's already defined
fig = px.histogram( df.query(" victim_sex=='F' ")
                   , x="victim_age"
                   , color = 'victim_sex'
                   ,title='Distribution of female victims per age')



# Show the figure
fig.show('notebook')
# Assuming df is your DataFrame and it's already defined
fig = px.histogram( df.query(" victim_sex=='M' ")
                   , x="victim_age"
                   , color = 'victim_sex'
                   ,title='Distribution of males victims per age')



# Show the figure
fig.show('notebook')

Victim sex¶

df.groupby('victim_sex')['counter'].count().reset_index().sort_values(by='counter',ascending=False).head(5)
victim_sex counter
3 M 351362
1 F 313468
4 X 75420
2 H 93
0 - 1

Victim descent¶

df.groupby('victim_descent')['counter'].count().reset_index().sort_values(by='counter',ascending=False).head(5)
victim_descent counter
7 H 261145
17 W 173440
2 B 120896
18 X 83214
12 O 67532

Victims sex / descent¶

df_victims = df.pivot_table( index='victim_descent',columns='victim_sex',aggfunc='sum',values='counter').reset_index()
df_victims = df_victims[['victim_descent','M','F']]
df_victims=df_victims.fillna(0)
df_victims['total']=df_victims['M']+df_victims['F']
df_victims = df_victims.sort_values(by='total',ascending=False)

df_victims['female_share'] = df_victims['F']  / df_victims['total']
df_victims['female_share']=df_victims['female_share'].round(2)

df_victims.head(5)
victim_sex victim_descent M F total female_share
7 H 129647.0 131318.0 260965.0 0.50
17 W 100225.0 72995.0 173220.0 0.42
2 B 53081.0 67676.0 120757.0 0.56
12 O 43695.0 23741.0 67436.0 0.35
1 A 9657.0 9034.0 18691.0 0.48

Crimes by dates¶

df.head(1)
date_reported date_occurred area_name crime_description victim_age victim_sex victim_descent premise_description weapon_description status_description location latitude longitude counter year_occurred month_occurred hour_occurred
0 2020-01-08 2020-01-08 22:30:00 Southwest BATTERY - SIMPLE ASSAULT 36 F B SINGLE FAMILY DWELLING STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE) Adult Other 1100 W 39TH PL 34.0141 -118.2978 1 2020 1 22
fig = px.line(df.pivot_table(index='year_occurred' ,aggfunc='sum' , values='counter'  ).reset_index()
                , x="year_occurred"
                , y="counter"
                , title='Nb of crimes per year'
                ,markers=True
                )
fig.show('notebook')
df_grouped = df.pivot_table(index=['month_occurred'] ,aggfunc='sum' , values='counter'  ).reset_index()
df_grouped['counter_mean']=df_grouped['counter'] / len(df_grouped)
df_grouped
month_occurred counter counter_mean
0 1 73129 6094.083333
1 2 68679 5723.250000
2 3 71196 5933.000000
3 4 70299 5858.250000
4 5 73349 6112.416667
5 6 72891 6074.250000
6 7 75429 6285.750000
7 8 75072 6256.000000
8 9 72141 6011.750000
9 10 75464 6288.666667
10 11 69960 5830.000000
11 12 55341 4611.750000
fig = px.line(df_grouped
                , x="month_occurred"
                , y="counter_mean"
                #,color ='year_occurred'
                ,title='Crime Month Seasonality'
                ,markers=True
                )
fig.show('notebook')
fig = px.line(df.pivot_table(index=['year_occurred','month_occurred'] ,aggfunc='sum' , values='counter'  ).reset_index()
                , x="month_occurred"
                , y="counter"
                ,color ='year_occurred'
                ,title='Crime Month Seasonality'
                ,markers=True
                )
fig.show('notebook')
import plotly.express as px

# Assuming df is your DataFrame and it's already defined
fig = px.line(df.pivot_table(index=['year_occurred', 'month_occurred'], aggfunc='sum', values='counter').reset_index(),
              x="month_occurred",
              y="counter",
              color='year_occurred',
              title='Crime Month Seasonality',
              markers=True)

# Add an annotation with an arrow for December 2023
fig.add_annotation(
    x=12,  # x-coordinate for December
    y=df[(df['year_occurred'] == 2023) & (df['month_occurred'] == 12)]['counter'].sum(),  # y-coordinate for December 2023
    text="Missing data for December 2023",  # The annotation text
    showarrow=True,  # Show an arrow pointing to the specified coordinates
    arrowhead=1,  # The style of the arrowhead (1 is a simple arrow)
    ax=-120,  # Horizontal offset of the arrow (adjust as needed)
    ay=-50  # Vertical offset of the arrow (adjust as needed)
)

# Show the figure
fig.show('notebook')

Crimes correlations : What crimes correlate with each - others ?¶

# Assuming you have a DataFrame 'df' with columns 'area_name', 'counter', and 'crime_description'
df_crimes = df.pivot_table(index='area_name', aggfunc='sum', values='counter', columns='crime_description').reset_index()

# Set 'area_name' as the index
df_crimes = df_crimes.set_index('area_name')
import pandas as pd

# Assuming df_crimes is your DataFrame
# df_crimes = pd.read_csv('your_data.csv')  # Example to load data

# Calculate the correlation matrix
correlation_matrix = df_crimes.corr()

# Display the correlation matrix
correlation_matrix
crime_description ARSON ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT ATTEMPTED ROBBERY BATTERY - SIMPLE ASSAULT BATTERY ON A FIREFIGHTER BATTERY POLICE (SIMPLE) BATTERY WITH SEXUAL CONTACT BEASTIALITY, CRIME AGAINST NATURE SEXUAL ASSLT WITH ANIM BIGAMY ... UNAUTHORIZED COMPUTER ACCESS VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS) VANDALISM - MISDEAMEANOR ($399 OR UNDER) VEHICLE - ATTEMPT STOLEN VEHICLE - STOLEN VEHICLE, STOLEN - OTHER (MOTORIZED SCOOTERS, BIKES, ETC) VIOLATION OF COURT ORDER VIOLATION OF RESTRAINING ORDER VIOLATION OF TEMPORARY RESTRAINING ORDER WEAPONS POSSESSION/BOMBING
crime_description
ARSON 1.000000 0.640382 0.802754 0.877036 0.795099 0.414322 0.792373 0.416257 0.319015 -0.408223 ... -0.575377 0.762680 0.785109 0.394897 0.627737 0.012203 -0.448538 -0.021869 -0.582394 0.220270
ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER 0.640382 1.000000 0.558682 0.599608 0.674356 0.542911 0.737177 0.470088 0.203734 -0.227827 ... -0.452161 0.585272 0.516159 0.114434 0.181087 0.079398 -0.223461 -0.031021 -0.356584 0.038231
ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT 0.802754 0.558682 1.000000 0.921664 0.722539 0.267665 0.812042 0.191782 0.053449 -0.492927 ... -0.614649 0.488878 0.875467 0.604097 0.764485 0.004481 -0.276640 0.272391 -0.298845 0.282560
ATTEMPTED ROBBERY 0.877036 0.599608 0.921664 1.000000 0.879820 0.461832 0.827545 0.440890 0.361852 -0.538950 ... -0.534217 0.678861 0.807360 0.424522 0.608391 0.014359 -0.383709 0.117981 -0.351585 0.293896
BATTERY - SIMPLE ASSAULT 0.795099 0.674356 0.722539 0.879820 1.000000 0.666625 0.817322 0.749157 0.443872 -0.418397 ... -0.397360 0.864910 0.695500 0.250183 0.315457 0.182661 -0.527686 -0.083608 -0.382659 0.250179
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
VEHICLE, STOLEN - OTHER (MOTORIZED SCOOTERS, BIKES, ETC) 0.012203 0.079398 0.004481 0.014359 0.182661 0.201154 0.125646 0.360140 0.509747 0.983821 ... -0.029351 0.331106 0.237177 0.437324 0.112618 1.000000 -0.078017 -0.093430 -0.192871 0.273810
VIOLATION OF COURT ORDER -0.448538 -0.223461 -0.276640 -0.383709 -0.527686 -0.322065 -0.422450 -0.534585 0.017299 0.308828 ... 0.036071 -0.579071 -0.269040 0.042368 -0.014250 -0.078017 1.000000 0.278040 0.416653 -0.028942
VIOLATION OF RESTRAINING ORDER -0.021869 -0.031021 0.272391 0.117981 -0.083608 -0.261454 -0.058197 -0.435435 -0.625504 -0.146899 ... -0.496845 -0.287178 0.215279 0.412006 0.519665 -0.093430 0.278040 1.000000 -0.140528 0.325251
VIOLATION OF TEMPORARY RESTRAINING ORDER -0.582394 -0.356584 -0.298845 -0.351585 -0.382659 -0.197052 -0.416955 -0.239865 0.363688 -0.571571 ... 0.407601 -0.587476 -0.521707 -0.235402 -0.325704 -0.192871 0.416653 -0.140528 1.000000 -0.349798
WEAPONS POSSESSION/BOMBING 0.220270 0.038231 0.282560 0.293896 0.250179 0.212912 0.356509 0.195228 -0.722222 0.547723 ... -0.358772 0.308248 0.275263 0.256395 0.252524 0.273810 -0.028942 0.325251 -0.349798 1.000000

138 rows × 138 columns

Clusters¶

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# Assuming you have a DataFrame 'df' with columns 'area_name', 'counter', and 'crime_description'
# Creating the df_crimes DataFrame
df_crimes = df.pivot_table(index='area_name', aggfunc='sum', values='counter', columns='crime_description')
df_crimes = df_crimes.fillna(0)

# Preparing the data for clustering
X = df_crimes.values  # Extracting the numerical values for clustering

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Applying K-Means Clustering
k = 5  # Number of clusters
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X_scaled)

# Attaching the cluster labels to the original DataFrame
df_crimes['Cluster'] = kmeans.labels_

# Displaying the first few rows of the DataFrame with cluster labels
df_crimes.sort_values(by='Cluster')
C:\Users\antoi\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning:

The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning

crime_description ARSON ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT ATTEMPTED ROBBERY BATTERY - SIMPLE ASSAULT BATTERY ON A FIREFIGHTER BATTERY POLICE (SIMPLE) BATTERY WITH SEXUAL CONTACT BEASTIALITY, CRIME AGAINST NATURE SEXUAL ASSLT WITH ANIM BIGAMY ... VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS) VANDALISM - MISDEAMEANOR ($399 OR UNDER) VEHICLE - ATTEMPT STOLEN VEHICLE - STOLEN VEHICLE, STOLEN - OTHER (MOTORIZED SCOOTERS, BIKES, ETC) VIOLATION OF COURT ORDER VIOLATION OF RESTRAINING ORDER VIOLATION OF TEMPORARY RESTRAINING ORDER WEAPONS POSSESSION/BOMBING Cluster
area_name
Foothill 86.0 52.0 1562.0 93.0 2173.0 6.0 60.0 126.0 1.0 0.0 ... 1705.0 789.0 71.0 3585.0 7.0 370.0 474.0 44.0 2.0 0
Harbor 84.0 23.0 2104.0 153.0 2815.0 9.0 74.0 111.0 0.0 0.0 ... 2294.0 1182.0 235.0 4885.0 26.0 353.0 1096.0 17.0 3.0 0
Hollenbeck 95.0 59.0 2203.0 156.0 2885.0 10.0 80.0 116.0 0.0 0.0 ... 2092.0 1119.0 197.0 5029.0 3.0 368.0 694.0 45.0 1.0 0
Mission 92.0 35.0 1798.0 149.0 2297.0 6.0 78.0 177.0 0.0 0.0 ... 1960.0 801.0 108.0 4662.0 2.0 338.0 925.0 35.0 0.0 0
Newton 172.0 49.0 3503.0 339.0 3904.0 6.0 97.0 165.0 0.0 1.0 ... 2571.0 1159.0 181.0 6693.0 5.0 208.0 831.0 38.0 3.0 0
Southwest 104.0 77.0 3197.0 272.0 4065.0 7.0 127.0 218.0 0.0 0.0 ... 2787.0 1415.0 255.0 5204.0 546.0 331.0 743.0 45.0 3.0 0
Rampart 125.0 37.0 2883.0 329.0 3833.0 19.0 93.0 195.0 0.0 1.0 ... 2437.0 1128.0 179.0 4366.0 58.0 253.0 438.0 36.0 2.0 0
Central 199.0 151.0 3755.0 436.0 6276.0 41.0 300.0 369.0 2.0 1.0 ... 4029.0 1389.0 115.0 3755.0 118.0 133.0 260.0 9.0 2.0 1
77th Street 183.0 71.0 5334.0 447.0 4195.0 7.0 258.0 158.0 0.0 0.0 ... 2893.0 1602.0 238.0 7046.0 3.0 218.0 589.0 16.0 2.0 2
Southeast 149.0 82.0 4399.0 321.0 3500.0 10.0 202.0 116.0 1.0 0.0 ... 2255.0 1592.0 282.0 5825.0 1.0 264.0 690.0 36.0 0.0 2
West LA 73.0 5.0 722.0 94.0 2648.0 7.0 37.0 198.0 0.0 0.0 ... 2485.0 923.0 48.0 2831.0 162.0 163.0 297.0 17.0 1.0 3
Van Nuys 87.0 12.0 1467.0 107.0 2569.0 3.0 70.0 162.0 1.0 0.0 ... 2309.0 932.0 84.0 3636.0 32.0 343.0 385.0 33.0 0.0 3
Topanga 45.0 29.0 1417.0 112.0 2496.0 13.0 55.0 159.0 0.0 0.0 ... 1803.0 676.0 84.0 2704.0 16.0 215.0 652.0 89.0 1.0 3
Northeast 119.0 30.0 1541.0 185.0 2419.0 10.0 84.0 114.0 0.0 0.0 ... 2471.0 1048.0 140.0 4337.0 25.0 384.0 537.0 24.0 3.0 3
West Valley 75.0 28.0 1596.0 136.0 2457.0 14.0 64.0 179.0 2.0 0.0 ... 2112.0 819.0 117.0 3393.0 19.0 463.0 173.0 130.0 1.0 3
N Hollywood 127.0 32.0 1763.0 141.0 3036.0 5.0 85.0 190.0 0.0 1.0 ... 2816.0 1061.0 141.0 3960.0 48.0 87.0 102.0 26.0 1.0 3
Devonshire 47.0 13.0 1192.0 101.0 2361.0 4.0 56.0 147.0 0.0 0.0 ... 1716.0 671.0 77.0 3151.0 34.0 315.0 339.0 131.0 0.0 3
Wilshire 119.0 118.0 1550.0 160.0 2898.0 7.0 100.0 174.0 0.0 0.0 ... 2551.0 945.0 54.0 3326.0 47.0 307.0 354.0 11.0 1.0 3
Olympic 133.0 25.0 2470.0 280.0 4053.0 6.0 84.0 205.0 2.0 1.0 ... 2671.0 1186.0 146.0 4652.0 46.0 293.0 447.0 42.0 1.0 4
Hollywood 116.0 54.0 2540.0 270.0 4102.0 19.0 189.0 270.0 1.0 0.0 ... 3052.0 1022.0 62.0 3211.0 53.0 194.0 396.0 21.0 5.0 4
Pacific 119.0 32.0 1880.0 154.0 2994.0 20.0 139.0 216.0 0.0 2.0 ... 2817.0 1131.0 296.0 5222.0 544.0 255.0 327.0 10.0 3.0 4

21 rows × 139 columns

df_evo_2 = df_evo.merge(df_crimes,on='area_name')
df_evo_2.head(1)
area_name 2021 2023 evol colors ARSON ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT ATTEMPTED ROBBERY BATTERY - SIMPLE ASSAULT ... VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS) VANDALISM - MISDEAMEANOR ($399 OR UNDER) VEHICLE - ATTEMPT STOLEN VEHICLE - STOLEN VEHICLE, STOLEN - OTHER (MOTORIZED SCOOTERS, BIKES, ETC) VIOLATION OF COURT ORDER VIOLATION OF RESTRAINING ORDER VIOLATION OF TEMPORARY RESTRAINING ORDER WEAPONS POSSESSION/BOMBING Cluster
0 Hollywood 12084 10274 -0.15 green 116.0 54.0 2540.0 270.0 4102.0 ... 3052.0 1022.0 62.0 3211.0 53.0 194.0 396.0 21.0 5.0 4

1 rows × 144 columns

df_evo_2['Cluster']=df_evo_2['Cluster'].astype(str)
fig = px.scatter(df_evo_2
                 , x="evol", y="2023", color="Cluster",
                 size='2023', hover_data=['2023'])
fig.show('notebook')
import pandas as pd
import plotly.express as px

# Ensure df_evo_2 is correctly set up with 'evol', '2023', 'Cluster', and 'area_name' columns

# Convert Cluster to a string for color coding
df_evo_2['Cluster'] = df_evo_2['Cluster'].astype(str)

# Create the scatter plot
fig = px.scatter(df_evo_2, x="evol", y="2023", color="Cluster",
                 size='2023', hover_data=['area_name', '2023'],
                 text='area_name')  # Add area name as text labels on the plot

# Customize the layout
fig.update_traces(textposition='top center')
fig.update_layout(
    title="Evolution of 2023 Compared to 2021 by Cluster",
    xaxis_title="Evolution from 2021 to 2023",
    yaxis_title="Value in 2023",
    legend_title="Cluster",
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# Customize hover template for better readability
fig.update_traces(hovertemplate="<br>".join([
    "Area: %{hoverdata[0]}",
    "2023 Value: %{hoverdata[1]}",
    "Evolution: %{x}",
    "Cluster: %{marker.color}"
]))

# Show the plot
fig.show('notebook')

THE END